1 Summary

2 Preparations

2.1 libraries

library(dplyr)
library(readr)
library(purrr)
library(knitr)

library(datamodelr)
library(nycflights13)

options(scipen=10, readr.num_columns = 0)
opts_chunk$set(
  #out.width = '\\maxwidth'
  highlight = T
)

2.2 Data Prep

dm_f <- dm_from_data_frames(flights, airlines, weather, airports, planes)

2.3 Data Check

map(list(flights, airlines, weather, airports, planes), str)
## Classes 'tbl_df', 'tbl' and 'data.frame':    336776 obs. of  19 variables:
##  $ year          : int  2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
##  $ month         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ day           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ dep_time      : int  517 533 542 544 554 554 555 557 557 558 ...
##  $ sched_dep_time: int  515 529 540 545 600 558 600 600 600 600 ...
##  $ dep_delay     : num  2 4 2 -1 -6 -4 -5 -3 -3 -2 ...
##  $ arr_time      : int  830 850 923 1004 812 740 913 709 838 753 ...
##  $ sched_arr_time: int  819 830 850 1022 837 728 854 723 846 745 ...
##  $ arr_delay     : num  11 20 33 -18 -25 12 19 -14 -8 8 ...
##  $ carrier       : chr  "UA" "UA" "AA" "B6" ...
##  $ flight        : int  1545 1714 1141 725 461 1696 507 5708 79 301 ...
##  $ tailnum       : chr  "N14228" "N24211" "N619AA" "N804JB" ...
##  $ origin        : chr  "EWR" "LGA" "JFK" "JFK" ...
##  $ dest          : chr  "IAH" "IAH" "MIA" "BQN" ...
##  $ air_time      : num  227 227 160 183 116 150 158 53 140 138 ...
##  $ distance      : num  1400 1416 1089 1576 762 ...
##  $ hour          : num  5 5 5 5 6 5 6 6 6 6 ...
##  $ minute        : num  15 29 40 45 0 58 0 0 0 0 ...
##  $ time_hour     : POSIXct, format: "2013-01-01 05:00:00" "2013-01-01 05:00:00" ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    16 obs. of  2 variables:
##  $ carrier: chr  "9E" "AA" "AS" "B6" ...
##  $ name   : chr  "Endeavor Air Inc." "American Airlines Inc." "Alaska Airlines Inc." "JetBlue Airways" ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    26115 obs. of  15 variables:
##  $ origin    : chr  "EWR" "EWR" "EWR" "EWR" ...
##  $ year      : num  2013 2013 2013 2013 2013 ...
##  $ month     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ day       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hour      : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ temp      : num  39 39 39 39.9 39 ...
##  $ dewp      : num  26.1 27 28 28 28 ...
##  $ humid     : num  59.4 61.6 64.4 62.2 64.4 ...
##  $ wind_dir  : num  270 250 240 250 260 240 240 250 260 260 ...
##  $ wind_speed: num  10.36 8.06 11.51 12.66 12.66 ...
##  $ wind_gust : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ precip    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pressure  : num  1012 1012 1012 1012 1012 ...
##  $ visib     : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ time_hour : POSIXct, format: "2013-01-01 01:00:00" "2013-01-01 02:00:00" ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    1458 obs. of  8 variables:
##  $ faa  : chr  "04G" "06A" "06C" "06N" ...
##  $ name : chr  "Lansdowne Airport" "Moton Field Municipal Airport" "Schaumburg Regional" "Randall Airport" ...
##  $ lat  : num  41.1 32.5 42 41.4 31.1 ...
##  $ lon  : num  -80.6 -85.7 -88.1 -74.4 -81.4 ...
##  $ alt  : int  1044 264 801 523 11 1593 730 492 1000 108 ...
##  $ tz   : num  -5 -6 -6 -5 -5 -5 -5 -5 -5 -8 ...
##  $ dst  : chr  "A" "A" "A" "A" ...
##  $ tzone: chr  "America/New_York" "America/Chicago" "America/Chicago" "America/New_York" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   id = col_integer(),
##   ..   name = col_character(),
##   ..   city = col_character(),
##   ..   country = col_character(),
##   ..   faa = col_character(),
##   ..   icao = col_character(),
##   ..   lat = col_double(),
##   ..   lon = col_double(),
##   ..   alt = col_integer(),
##   ..   tz = col_double(),
##   ..   dst = col_character(),
##   ..   tzone = col_character()
##   .. )
## Classes 'tbl_df', 'tbl' and 'data.frame':    3322 obs. of  9 variables:
##  $ tailnum     : chr  "N10156" "N102UW" "N103US" "N104UW" ...
##  $ year        : int  2004 1998 1999 1999 2002 1999 1999 1999 1999 1999 ...
##  $ type        : chr  "Fixed wing multi engine" "Fixed wing multi engine" "Fixed wing multi engine" "Fixed wing multi engine" ...
##  $ manufacturer: chr  "EMBRAER" "AIRBUS INDUSTRIE" "AIRBUS INDUSTRIE" "AIRBUS INDUSTRIE" ...
##  $ model       : chr  "EMB-145XR" "A320-214" "A320-214" "A320-214" ...
##  $ engines     : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ seats       : int  55 182 182 182 55 182 182 182 182 182 ...
##  $ speed       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ engine      : chr  "Turbo-fan" "Turbo-fan" "Turbo-fan" "Turbo-fan" ...
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL

3 Example

3.1 Schema Card

dm_create_graph でカード(勝手に命名)を作成する

graph <- dm_create_graph(dm_f, rankdir = "BT", col_attr = c("column", "type"), columnArrows = F)
dm_render_graph(graph)

3.2 Schema Card with primary keys

dm_add_references でprimary key情報(右の値)を付与する

rankdir=BTは Bottom -> Topの意味

dm_f <- dm_add_references(
  dm_f,
  flights$carrier == airlines$carrier,
  flights$origin == airports$faa,
  flights$dest == airports$faa,
  flights$tailnum == planes$tailnum,
  weather$origin == airports$faa
)
graph <- dm_create_graph(dm_f, rankdir = "BT", col_attr = c("column", "type"), edge_attrs = "dir = both, arrowtail = crow, arrowhead = odiamond", columnArrows = F)
dm_render_graph(graph)

3.3 Colored Schema Card with primary keys

dm_set_display を通すことで色をつけることができる

display <- list(
  accent1 = c("flights"),
  accent2 = c("airlines"),
  accent3 = c("weather"),
  accent4 = c("airports"),
  accent6 = c("planes")
)

dm_f <- dm_add_references(
  dm_f,
  flights$carrier == airlines$carrier,
  flights$origin == airports$faa,
  flights$dest == airports$faa,
  flights$tailnum == planes$tailnum,
  weather$origin == airports$faa
)

dm_f <- dm_set_display(dm_f, display)
graph <- dm_create_graph(dm_f, rankdir = "BT", col_attr = c("column", "type"), edge_attrs = "dir = both, arrowtail = crow, arrowhead = odiamond", columnArrows = F)
dm_render_graph(graph)

3.4 Colored Schema Card with primary keys and segment

dm_set_segment でセグメントを入れれる

display <- list(
  accent1 = c("flights"),
  accent2 = c("airlines"),
  accent3 = c("weather"),
  accent4 = c("airports"),
  accent6 = c("planes")
)

dm_f <- dm_add_references(
  dm_f,
  flights$carrier == airlines$carrier,
  flights$origin == airports$faa,
  flights$dest == airports$faa,
  flights$tailnum == planes$tailnum,
  weather$origin == airports$faa
)

dm_f <- dm_set_display(dm_f, display)

table_segments <- list(
  A = c("flights", "airlines"),
  B = c("weather"),
  C = c("airports", "planes")
)

dm_f <- dm_set_segment(dm_f, table_segments)

graph <- dm_create_graph(dm_f, rankdir = "BT", col_attr = c("column", "type"), edge_attrs = "dir = both, arrowtail = crow, arrowhead = odiamond", columnArrows = F)
dm_render_graph(graph)

3.5 Shiny

リアルタイムで作れるのもある。すご

# install.packages("shinyAce")
shiny::runApp(system.file("shiny", package = "datamodelr"))
LS0tCnRpdGxlOiAiRXggZGF0YW1vZGVsciIKYXV0aG9yOiAiZ2luZ2k5OSIKZGF0ZTogJ2ByIFN5cy50aW1lKClgJwpvdXRwdXQ6CiAgcm1kZm9ybWF0czo6aHRtbF9jbGVhbjoKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogICAgZmlnX3dpZHRoOiA3CiAgICBmaWdfaGVpZ2h0OiA0LjUKICAgIGZpZ19jYXB0aW9uOiB0cnVlCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKa25pdDogKGZ1bmN0aW9uKGlucHV0RmlsZSwgZW5jb2RpbmcpIHsgCiAgICAgIHJtYXJrZG93bjo6cmVuZGVyKGlucHV0RmlsZSwKICAgICAgICAgICAgICAgICAgICAgICAgZW5jb2Rpbmc9ZW5jb2RpbmcsIAogICAgICAgICAgICAgICAgICAgICAgICBvdXRwdXRfZGlyID0gIi4uL2RvY3MvIikgfSkKLS0tCgojIFN1bW1hcnkgCgoqIGRhdGFtb2RlbHLjgpLkvb/jgaPjgabjgb/jgosKICAgICogZ2l0aHViIDogaHR0cHM6Ly9naXRodWIuY29tL2JlcmdhbnQvZGF0YW1vZGVscgoKIyBQcmVwYXJhdGlvbnMgey50YWJzZXQgLnRhYnNldC1mYWRlIC50YWJzZXQtcGlsbHN9CgojIyBsaWJyYXJpZXMKCmBgYHtyLCBtZXNzYWdlPUYsIHdhcm5pbmc9Rn0KbGlicmFyeShkcGx5cikKbGlicmFyeShyZWFkcikKbGlicmFyeShwdXJycikKbGlicmFyeShrbml0cikKCmxpYnJhcnkoZGF0YW1vZGVscikKbGlicmFyeShueWNmbGlnaHRzMTMpCgpvcHRpb25zKHNjaXBlbj0xMCwgcmVhZHIubnVtX2NvbHVtbnMgPSAwKQpvcHRzX2NodW5rJHNldCgKICAjb3V0LndpZHRoID0gJ1xcbWF4d2lkdGgnCiAgaGlnaGxpZ2h0ID0gVAopCmBgYAoKIyMgRGF0YSBQcmVwCgpgYGB7cn0KZG1fZiA8LSBkbV9mcm9tX2RhdGFfZnJhbWVzKGZsaWdodHMsIGFpcmxpbmVzLCB3ZWF0aGVyLCBhaXJwb3J0cywgcGxhbmVzKQpgYGAKCiMjIERhdGEgQ2hlY2sKCmBgYHtyfQptYXAobGlzdChmbGlnaHRzLCBhaXJsaW5lcywgd2VhdGhlciwgYWlycG9ydHMsIHBsYW5lcyksIHN0cikKYGBgCgojIEV4YW1wbGUKCiMjIFNjaGVtYSBDYXJkCgpgYGBkbV9jcmVhdGVfZ3JhcGhgYGAg44Gn44Kr44O844OJ77yI5Yud5omL44Gr5ZG95ZCN77yJ44KS5L2c5oiQ44GZ44KLCgpgYGB7cn0KZ3JhcGggPC0gZG1fY3JlYXRlX2dyYXBoKGRtX2YsIHJhbmtkaXIgPSAiQlQiLCBjb2xfYXR0ciA9IGMoImNvbHVtbiIsICJ0eXBlIiksIGNvbHVtbkFycm93cyA9IEYpCmRtX3JlbmRlcl9ncmFwaChncmFwaCkKYGBgCgojIyBTY2hlbWEgQ2FyZCB3aXRoIHByaW1hcnkga2V5cwoKYGBgZG1fYWRkX3JlZmVyZW5jZXNgYGAg44GncHJpbWFyeSBrZXnmg4XloLHvvIjlj7Pjga7lgKTvvInjgpLku5jkuI7jgZnjgosKCmBgYHJhbmtkaXI9QlRgYGDjga8gQm90dG9tIC0+IFRvcOOBruaEj+WRswoKYGBge3J9CmRtX2YgPC0gZG1fYWRkX3JlZmVyZW5jZXMoCiAgZG1fZiwKICBmbGlnaHRzJGNhcnJpZXIgPT0gYWlybGluZXMkY2FycmllciwKICBmbGlnaHRzJG9yaWdpbiA9PSBhaXJwb3J0cyRmYWEsCiAgZmxpZ2h0cyRkZXN0ID09IGFpcnBvcnRzJGZhYSwKICBmbGlnaHRzJHRhaWxudW0gPT0gcGxhbmVzJHRhaWxudW0sCiAgd2VhdGhlciRvcmlnaW4gPT0gYWlycG9ydHMkZmFhCikKZ3JhcGggPC0gZG1fY3JlYXRlX2dyYXBoKGRtX2YsIHJhbmtkaXIgPSAiQlQiLCBjb2xfYXR0ciA9IGMoImNvbHVtbiIsICJ0eXBlIiksIGVkZ2VfYXR0cnMgPSAiZGlyID0gYm90aCwgYXJyb3d0YWlsID0gY3JvdywgYXJyb3doZWFkID0gb2RpYW1vbmQiLCBjb2x1bW5BcnJvd3MgPSBGKQpkbV9yZW5kZXJfZ3JhcGgoZ3JhcGgpCmBgYAoKIyMgQ29sb3JlZCBTY2hlbWEgQ2FyZCB3aXRoIHByaW1hcnkga2V5cyAKCmBgYGRtX3NldF9kaXNwbGF5YGBgIOOCkumAmuOBmeOBk+OBqOOBp+iJsuOCkuOBpOOBkeOCi+OBk+OBqOOBjOOBp+OBjeOCiwoKYGBge3J9CmRpc3BsYXkgPC0gbGlzdCgKICBhY2NlbnQxID0gYygiZmxpZ2h0cyIpLAogIGFjY2VudDIgPSBjKCJhaXJsaW5lcyIpLAogIGFjY2VudDMgPSBjKCJ3ZWF0aGVyIiksCiAgYWNjZW50NCA9IGMoImFpcnBvcnRzIiksCiAgYWNjZW50NiA9IGMoInBsYW5lcyIpCikKCmRtX2YgPC0gZG1fYWRkX3JlZmVyZW5jZXMoCiAgZG1fZiwKICBmbGlnaHRzJGNhcnJpZXIgPT0gYWlybGluZXMkY2FycmllciwKICBmbGlnaHRzJG9yaWdpbiA9PSBhaXJwb3J0cyRmYWEsCiAgZmxpZ2h0cyRkZXN0ID09IGFpcnBvcnRzJGZhYSwKICBmbGlnaHRzJHRhaWxudW0gPT0gcGxhbmVzJHRhaWxudW0sCiAgd2VhdGhlciRvcmlnaW4gPT0gYWlycG9ydHMkZmFhCikKCmRtX2YgPC0gZG1fc2V0X2Rpc3BsYXkoZG1fZiwgZGlzcGxheSkKZ3JhcGggPC0gZG1fY3JlYXRlX2dyYXBoKGRtX2YsIHJhbmtkaXIgPSAiQlQiLCBjb2xfYXR0ciA9IGMoImNvbHVtbiIsICJ0eXBlIiksIGVkZ2VfYXR0cnMgPSAiZGlyID0gYm90aCwgYXJyb3d0YWlsID0gY3JvdywgYXJyb3doZWFkID0gb2RpYW1vbmQiLCBjb2x1bW5BcnJvd3MgPSBGKQpkbV9yZW5kZXJfZ3JhcGgoZ3JhcGgpCmBgYAoKIyMgQ29sb3JlZCBTY2hlbWEgQ2FyZCB3aXRoIHByaW1hcnkga2V5cyBhbmQgc2VnbWVudAoKYGBgZG1fc2V0X3NlZ21lbnRgYGAg44Gn44K744Kw44Oh44Oz44OI44KS5YWl44KM44KM44KLCgpgYGB7cn0KZGlzcGxheSA8LSBsaXN0KAogIGFjY2VudDEgPSBjKCJmbGlnaHRzIiksCiAgYWNjZW50MiA9IGMoImFpcmxpbmVzIiksCiAgYWNjZW50MyA9IGMoIndlYXRoZXIiKSwKICBhY2NlbnQ0ID0gYygiYWlycG9ydHMiKSwKICBhY2NlbnQ2ID0gYygicGxhbmVzIikKKQoKZG1fZiA8LSBkbV9hZGRfcmVmZXJlbmNlcygKICBkbV9mLAogIGZsaWdodHMkY2FycmllciA9PSBhaXJsaW5lcyRjYXJyaWVyLAogIGZsaWdodHMkb3JpZ2luID09IGFpcnBvcnRzJGZhYSwKICBmbGlnaHRzJGRlc3QgPT0gYWlycG9ydHMkZmFhLAogIGZsaWdodHMkdGFpbG51bSA9PSBwbGFuZXMkdGFpbG51bSwKICB3ZWF0aGVyJG9yaWdpbiA9PSBhaXJwb3J0cyRmYWEKKQoKZG1fZiA8LSBkbV9zZXRfZGlzcGxheShkbV9mLCBkaXNwbGF5KQoKdGFibGVfc2VnbWVudHMgPC0gbGlzdCgKICBBID0gYygiZmxpZ2h0cyIsICJhaXJsaW5lcyIpLAogIEIgPSBjKCJ3ZWF0aGVyIiksCiAgQyA9IGMoImFpcnBvcnRzIiwgInBsYW5lcyIpCikKCmRtX2YgPC0gZG1fc2V0X3NlZ21lbnQoZG1fZiwgdGFibGVfc2VnbWVudHMpCgpncmFwaCA8LSBkbV9jcmVhdGVfZ3JhcGgoZG1fZiwgcmFua2RpciA9ICJCVCIsIGNvbF9hdHRyID0gYygiY29sdW1uIiwgInR5cGUiKSwgZWRnZV9hdHRycyA9ICJkaXIgPSBib3RoLCBhcnJvd3RhaWwgPSBjcm93LCBhcnJvd2hlYWQgPSBvZGlhbW9uZCIsIGNvbHVtbkFycm93cyA9IEYpCmRtX3JlbmRlcl9ncmFwaChncmFwaCkKYGBgCgoKIyMgU2hpbnkKCuODquOCouODq+OCv+OCpOODoOOBp+S9nOOCjOOCi+OBruOCguOBguOCi+OAguOBmeOBlAoKYGBge3IsIGV2YWwgPSBGfQojIGluc3RhbGwucGFja2FnZXMoInNoaW55QWNlIikKc2hpbnk6OnJ1bkFwcChzeXN0ZW0uZmlsZSgic2hpbnkiLCBwYWNrYWdlID0gImRhdGFtb2RlbHIiKSkKYGBgCg==